The main data is provided in “winter.csv”, containing the following variables on all medal winners in all winter olympics from 1924 to 2014:
Year: year of the winter olympicsCity: city the olympics is heldSport: the type of sportDiscipline: a grouping of disciplinesEvent: the particular event / competitionAthlete: name of the athleteCountry: country origin of the athleteGender: gender of the athleteMedal: type of medal wonFor example, an event is a competition in a sport or discipline that gives rise to a ranking. Thus, skiing is a sport, while cross-country skiing, Alpine skiing, snowboarding, ski jumping and Nordic combined are disciplines. Alpine skiing is a discipline, while the super-G, giant slalom, slalom and combined are events.
In addition, information about the countries is available in a separate spreadsheet “dictionary.csv”, including the IOC Country Code, Population, and GDP per capita.
olymp <- read.csv("../data/winter.csv")
gdp <- read.csv("../data/dictionary.csv")
olymp$Year <- as.Date(strptime(olymp$Year, format = "%Y"))
colnames(gdp) <- c("Country_Full", "Country", "Population", "GDP.per.Capita")Combine the information in the two spreadsheets winter.csv and dictionary.csv. Because some countries that competed under different designations in the past (e.g. Germany and Russia), I chose to combine Russia with URS, Germany with EUA, FRG and GDR, and Czech Republic with TCH.
A summary of how many winter games each country medaled in, and how many medals of each type the country won can be seen below.
# How many winter game Medal types per country
# Combine RUS and URS, and GER, EUA, FRG, GDR
olymp$Country <- as.character(olymp$Country)
olymp$Country <- ifelse(olymp$Country == "URS", "RUS", olymp$Country) # Russia
olymp$Country <- ifelse(olymp$Country == "EUA", "GER", olymp$Country) # Germany
olymp$Country <- ifelse(olymp$Country == "FRG", "GER", olymp$Country)
olymp$Country <- ifelse(olymp$Country == "GDR", "GER", olymp$Country)
olymp$Country <- ifelse(olymp$Country == "TCH", "CZE", olymp$Country) # Czech
olymp$Country <- as.factor(olymp$Country)
descriptive1 <- olymp %>%
mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
group_by(Country, Medal) %>%
mutate(medal_yr_cnt = length(Medal)) %>%
select(Country, Medal, medal_yr_cnt) %>%
unique() %>%
spread(key = Medal, value = medal_yr_cnt) %>%
ungroup() %>%
select(Country, Gold, Silver, Bronze)
descriptive1[is.na(descriptive1)] <- 0
# How many years in Olympics
descriptive2 <- olymp %>%
select(Country, Year) %>%
group_by(Country) %>%
unique() %>%
summarize(num_olymp = n())
# Combination -- Number of Olympics and Medals per Country
descriptive <- inner_join(descriptive1, descriptive2, by = "Country")
descriptive$total_medals <- rowSums(descriptive[, c(2,3,4)])
# Joining GDP information for later
descriptive_gdp <- inner_join(descriptive, gdp, by = "Country")
head(descriptive)## # A tibble: 6 x 6
## Country Gold Silver Bronze num_olymp total_medals
## <fct> <dbl> <dbl> <dbl> <int> <dbl>
## 1 AUS 5.00 3.00 7.00 6 15.0
## 2 AUT 79.0 98.0 103 22 280
## 3 BEL 2.00 4.00 7.00 4 13.0
## 4 BLR 6.00 4.00 5.00 6 15.0
## 5 BUL 1.00 2.00 3.00 4 6.00
## 6 CAN 315 203 107 22 625
# Who are the top 10 medal producers over time?
top10 <- descriptive %>%
arrange(desc(total_medals)) %>%
mutate(rank = row_number()) %>%
filter(rank <= 10)
head(top10)## # A tibble: 6 x 7
## Country Gold Silver Bronze num_olymp total_medals rank
## <fct> <dbl> <dbl> <dbl> <int> <dbl> <int>
## 1 RUS 344 187 172 15 703 1
## 2 USA 167 319 167 22 653 2
## 3 GER 226 208 203 20 637 3
## 4 CAN 315 203 107 22 625 4
## 5 NOR 159 171 127 22 457 5
## 6 FIN 66.0 147 221 22 434 6
# Calculate how many medals per year
medalsyear <- olymp %>%
group_by(Year, Country) %>%
filter(Country %in% top10$Country) %>%
summarize(medal_yr_cnt = length(Medal))
# Split by gold, silver, bronze medals, per year
medals_year <- olymp %>%
mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
group_by(Year, Country, Medal) %>%
summarize(medal_yr_cnt = length(Medal))
head(medals_year)## # A tibble: 6 x 4
## # Groups: Year, Country [4]
## Year Country Medal medal_yr_cnt
## <date> <fct> <fct> <int>
## 1 1924-02-18 AUT Gold 3
## 2 1924-02-18 AUT Silver 1
## 3 1924-02-18 BEL Bronze 5
## 4 1924-02-18 CAN Gold 9
## 5 1924-02-18 FIN Gold 4
## 6 1924-02-18 FIN Silver 8
# filter for only countries that have at least 100 medals
medals100 <- olymp %>%
group_by(Country) %>%
summarize(total_medals = length(Medal)) %>%
filter(total_medals >= 100)
head(medals100)## # A tibble: 6 x 2
## Country total_medals
## <fct> <int>
## 1 AUT 280
## 2 CAN 625
## 3 CZE 233
## 4 FIN 434
## 5 FRA 152
## 6 GER 637
I propose to use the following visualization for a look at the medals won by the top 10 medal-winning countries over time. I think the plot is clear in that it uses familiar colors (gold, silver, bronze) and doesn’t totally overwhelm the reader with information. While I actually liked my second plot (below) better, I can’t seem to run the code any longer without a strange error that I’ll fix within the week.
# Split by Country, over time
#png("../fig/medcountry_time.png")
medcountry <- medals_year %>%
filter(Country %in% top10$Country) %>%
ggplot(aes(x = Year, y = medal_yr_cnt, na.rm = TRUE)) +
geom_jitter(aes(color = Medal)) +
scale_color_manual(values=c("gold","grey73","darkgoldenrod4")) +
labs(x = "", y = "# Medals Won") +
facet_wrap(~Country) +
theme_light() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(strip.background = element_rect(fill = "lightskyblue2"))
medcountry The below plot worked for many days until it didn’t anymore. I would love to send this one to the editor, but need to diagnose what went wrong first.
# The winning "over time" plot
# medals_year %>%
# ggplot(aes(x = Year, y = medal_yr_cnt, na.rm = TRUE)) +
# geom_area(aes( fill = Medal), alpha = 0.8) +
# scale_fill_manual(values=c("gold","grey73","darkgoldenrod4")) +
# labs(x = "Year", y = "Medals Won",
# title = "Olympic Medals by Country",
# subtitle = "Winter Olympics 1924 - 2014") +
# facet_wrap(~Country) +
# theme_light() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# theme(strip.background = element_rect(fill = "lightskyblue2"))Medals Over Time
To visualize the total amount of medals won by the top 10 medal-winning countries, I produced a lollipop chart for a quick glance, then a bar chart with the same gold, silver and bronze coloring to add more information and make the stacks immediately obvious to the reader.
library(ggalt)
top10 %>%
ggplot(aes(x = reorder(Country, total_medals), y = total_medals)) +
geom_lollipop(point.colour = "lightblue", point.size = 2) +
labs(x = "", y = "Total Medals",
title = "Winter Olympics",
subtitle = "Winter Olympics 1924 - 2014: Top 10 Medal-Winning Countries") +
coord_flip() +
theme_minimal()# Viz for top medal producing countries, at least 100 medals
totalmedals_plot <- olymp %>%
mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
group_by(Country, Medal) %>%
filter(Country %in% medals100$Country) %>%
summarize(total_medals = length(Medal))
#png("../fig/totalmedals.png")
totalmedals <- totalmedals_plot %>%
ggplot(aes(x = reorder(Country, total_medals), y = total_medals,
fill = Medal, group = Medal)) +
geom_bar(stat = "identity") +
geom_text(aes(label = total_medals), position = position_stack(vjust = 0.5),
size = 3, color = "white") +
scale_fill_manual(values=c("gold","grey73","darkgoldenrod4")) +
labs(x = "", y = "",
title = "Olympic Medals Overall",
subtitle = "Winter Olympics 1924 - 2014: Countries with Over 100 Medals") +
theme_light()
totalmedalsI created three separate rankings of success by GDP per capita, population, and total number of medals won. I then chose to visualize this data in a few different ways. First, I visualized the percentage of medals won divided by the population of each country. Countries are ordered by GDP/Capita ranking, thus, countries with high GDP/Capita and low percentage of medals/population are highlighted in yellow. LUX, for instance, has the highest GDP/Capita yet a very low percentage of medals/population. SUI and NOR’s success makes sense, given they have high GDP/Capita and high percentage of medals/population. The USA, however, could do better!
# Data prep
by_pop_gdp <- descriptive_gdp %>%
arrange(desc(total_medals)) %>%
mutate(medal_rank = row_number()) %>%
arrange(desc(GDP.per.Capita), desc(total_medals)) %>%
mutate(gdp_medal_rank = row_number()) %>%
arrange(desc(Population), desc(total_medals)) %>%
mutate(pop_medal_rank = row_number()) %>%
filter(medal_rank <10 | gdp_medal_rank <10 |pop_medal_rank <10)
head(by_pop_gdp)## # A tibble: 6 x 12
## Country Gold Silver Bronze num_olymp total_medals Country_Full
## <chr> <dbl> <dbl> <dbl> <int> <dbl> <fct>
## 1 CHN 16.0 30.0 36.0 7 82.0 China
## 2 USA 167 319 167 22 653 United States
## 3 RUS 344 187 172 15 703 Russia
## 4 JPN 17.0 22.0 24.0 12 63.0 Japan
## 5 GER 226 208 203 20 637 Germany
## 6 FRA 36.0 35.0 81.0 21 152 France
## # ... with 5 more variables: Population <int>, GDP.per.Capita <dbl>,
## # medal_rank <int>, gdp_medal_rank <int>, pop_medal_rank <int>
# GDP Viz
# by_pop_gdp %>%
# ggplot(aes(x = reorder(Country, gdp_medal_rank), y = total_medals)) +
# geom_bar(aes(fill = total_medals < 20), stat = "identity") +
# scale_fill_manual(values=c("grey","gold")) +
# labs(x = "Country in Order of GDP/Capita Ranking", y = "Medals Won Overall",
# title = "Some Countries Should Be Doing Better",
# subtitle = "Winter Olympic Medals by Countries in Order of Highest GDP/Capita") +
# theme_light() +
# theme(legend.position = "none")
# GDP Viz
by_pop_gdp %>%
ggplot(aes(x = reorder(Country, gdp_medal_rank), y = (total_medals/Population * 100))) +
geom_bar(aes(fill = (total_medals/Population* 100) < 0.0025), stat = "identity") +
scale_fill_manual(values=c("grey","gold")) +
labs(x = "Country in Order of GDP/Capita Ranking", y = "% of Medal Winners/Pop",
title = "Some Countries Should Be Doing Better",
subtitle = "Winter Olympic Medals/Population of Countries") +
theme_light() +
theme(legend.position = "none") The below viz shows the countries lined up by population size and compares how many medals they’ve won in total. Countries with the fewest medals are “shamed” by being highlighted in yellow.
# Pop Viz
by_pop_gdp %>%
ggplot(aes(x = reorder(Country, pop_medal_rank), y = total_medals)) +
geom_bar(aes(fill = total_medals < Population/500000), stat = "identity") +
scale_fill_manual(values=c("grey","gold")) +
labs(x = "Country in Order of Largest Population", y = "Medals Won Overall",
title = "Some Countries Should Be Doing Better",
subtitle = "Countries that have enough people to win more medals") +
theme_light() +
theme(legend.position = "none")I adjusted the viz such that we can see the countries with the highest GDP/Capita and largest populations are visualized with how many medals they’ve won. In this case, larger bubbles mean more overall medals won. I’d argue that it doesn’t make a ton of sense to compare GDP/Capita to Population, so I’d scrap this.
by_pop_gdp %>%
ggplot(aes(x = log(Population), y = log(GDP.per.Capita))) +
geom_count(aes(color = Country, size = total_medals, fill = Country, alpha = 0.7), show.legend = FALSE) +
scale_size_area(max_size = 20) +
geom_text(aes(label = Country), check_overlap = TRUE, size = 3) +
theme_light() +
theme(axis.text = element_blank()) +
theme(axis.ticks = element_blank()) +
labs(x = expression(atop("Population", paste(symbol('\256')))),
y = expression(atop("GDP/Capita",paste(symbol('\256')))),
title = "Some Countries Should Be Doing Better",
subtitle = "Winter Olympics 1924 - 2014: Medals per Pop & GDP of Country",
caption = "Bigger Bubbles = More Medals Won") +
theme(axis.title.y = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 12)) The following viz points out China, specifically, as a country with both a lot of people and a lot of money, but we can see from the size of its bubble that China is not a very big medal producer. They could also be doing better!
by_pop_gdp %>%
ggplot(aes(x = log(Population), y = log(GDP.per.Capita * Population))) +
geom_count(aes(color = Country, size = total_medals, fill = Country, alpha = 0.7), show.legend = FALSE) +
scale_size_area(max_size = 20) +
geom_text(aes(label = Country), check_overlap = TRUE, size = 3) +
theme_light() +
theme(axis.text = element_blank()) +
theme(axis.ticks = element_blank()) +
labs(x = expression(atop("Population", paste(symbol('\256')))),
y = expression(atop("GDP",paste(symbol('\256')))),
title = "China Should Be Doing Better",
subtitle = "Winter Olympics 1924 - 2014: Medals per Pop & GDP of Country",
caption = "Bigger Bubbles = More Medals Won") +
theme(axis.title.y = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 12)) This final visualization is the second one I would recommend to my editor. I feel the x and y measurements make sense with each other, and the bubbles are fairly aesthetically pleasing. It’s clear that Norway and Finland do well for themselves given the large amount of medals they’ve won despite their smaller populations.
#png("../fig/norwaymedals.png")
norway <- by_pop_gdp %>%
ggplot(aes(x = log(Population), y = total_medals/Population *100)) +
geom_count(aes(color = Country, size = total_medals,
fill = Country, alpha = 0.7), show.legend = FALSE) +
#scale_fill_manual(values=c("purple","yellow")) +
scale_size_area(max_size = 20) +
geom_text(aes(label = Country), check_overlap = TRUE, size = 3) +
theme_classic() +
theme(axis.text = element_blank()) +
theme(axis.ticks = element_blank()) +
labs(x = expression(atop("Population", paste(symbol('\256')))),
y = expression(atop("% of Medals per Pop",paste(symbol('\256')))),
title = "Norway & Finland Citizens Dominate",
subtitle = "Winter Olympics 1924 - 2014: Percentage of Medals Won per Population",
caption = "Bigger Bubbles = More Medals Won Overall") +
theme(axis.title.y = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 12))
#geom_encircle(data = subset(by_pop_gdp, (total_medals/Population *100 > 0.0040)))
norwayI calculated whether the countries won more medals when they hosted Winter Olympics as opposed to when they were outside participants. To do so, I downloaded necessary country host information from Wikipedia below.
library(rvest)
library(stringr)
wiki_hosts <- read_html("https://en.wikipedia.org/wiki/Winter_Olympic_Games")
hosts <- html_table(html_nodes(wiki_hosts, "table")[[5]], fill=TRUE)
hosts <- hosts[-1,1:3]
hosts$city <- str_split_fixed(hosts$Host, n=2, ",")[,1]
hosts$country <- str_split_fixed(hosts$Host, n=2, ",")[,2]
hosts$Year <- as.Date(strptime(hosts$Year, format = "%Y"))
hosts$Year <- format(hosts$Year, '%Y')
medals_year$Year <- format(medals_year$Year, '%Y') # weird issues with year
hosts <- hosts %>%
select(Year, country)
colnames(hosts)[colnames(hosts) == "country"] <- "Host_Country"
# Join tables for Full Country Name, Year, number of medals won and whether they hosted
hosts <- merge(medals_year, hosts, on = "Year")
hosts <- merge(hosts, descriptive_gdp, on = "Country")
hosts$Host_Country <- as.character(hosts$Host_Country)
hosts$Country_Full <- as.character(hosts$Country_Full)
hosts$Host_Country <- trimws(hosts$Host_Country)
hosts <- hosts %>%
select(Year, Country, Country_Full, Medal, medal_yr_cnt, Host_Country) %>%
mutate(hosted = ifelse(Country_Full == Host_Country, 1, 0)) %>%
spread(key = Medal, value = medal_yr_cnt)
hosts[is.na(hosts)] <- 0
hosts$total_medals <- rowSums(hosts[, c(6,7,8)])I began with a wrapped bar chart for each country that has ever hosted in the Olympics. I chose stark colors (grey and blue) so the reader can easily see a country’s total number of medals won when they were hosts versus when they were not hosts. It’s clear from the visualization that most countries do exceedingly better when they host.
#png("../fig/hostplot1.png")
hosts %>%
filter(Country_Full %in% hosts$Host_Country) %>%
ggplot(aes(x = Year, y = total_medals)) +
geom_histogram(aes(fill = as.factor(hosted)), stat = "identity") +
facet_wrap(~Country_Full, nrow = 4) +
theme_minimal() +
theme(axis.text = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(strip.background = element_rect(fill = "lightskyblue2")) +
scale_fill_manual(values=c("gray87","lightseagreen"), labels = c("Medals; Not Hosting", "Host")) +
labs(x = "1924 - 2014", y = "",
title = "Home Court Advantage",
subtitle = "Host Countries of the Winter Olympics 1924 - 2014",
fill = "Medals Won Per Year") +
theme(text = element_text(size = 11))A downside of the next plot I created is that it’s slightly harder to compare the amount of medals won each year, but I find it particularly aesthetically pleasing. This plot includes only countries that have hosted an Olympics and shows how many medals each country won at each winter Olympics. Tiles outlined in black indicate that the country hosted that year.
#png("../fig/hostplot2.png")
palette <- colorRampPalette(brewer.pal(9, 'GnBu'), space='Lab', bias = 5)
hosts$hosted <- as.factor(hosts$hosted)
home.court <- hosts %>%
filter(Country_Full %in% hosts$Host_Country) %>%
ggplot(aes(x = Year, y = Country)) +
geom_raster(aes(fill = total_medals)) +
geom_tile(aes(color = hosted), fill = "#00000000", size = 1, show.legend = FALSE) +
theme_light() +
scale_fill_gradientn(colors = palette(9)) +
scale_color_manual(values = c('#00000000', 'black')) +
labs(x="", y="", fill="Medals",
title = "Home Court Advantage",
subtitle = "Host Countries of the Winter Olympics 1924 - 2014",
caption = "Hosting country indicated by outline. ") +
theme(
legend.text=element_text(color="grey20"),
axis.text.x=element_text(size=8),
axis.ticks.y=element_blank(),
axis.ticks.x = element_blank(),
panel.grid=element_blank(),
plot.margin = unit(c(.5,1,0.3,1), "cm")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
home.courtFirst, I wanted to compare USA and Canadian Olympic performance especially because of their hockey rivalry. I adjusted the dataset to account for the fact that hockey is a large team sport and calculated the total number of medals as just one per win rather than per player. I visualized the number of medals won per sport between the countries two different ways. I’d suggest to my editor to scrap the first – I don’t find it as visually appealing as the second.
# calculate the large team sport, hockey, as just one medal per team rather than person
sports_per_game <- olymp %>%
group_by(Country, Year, Sport, Medal) %>%
select(Country, Year, Sport, Medal) %>%
unique() %>%
summarize(num = n()) %>%
group_by(Country, Sport, Medal) %>%
mutate(num_olymp_game = sum(num)) %>%
ungroup() %>%
select(Country, Sport, Medal, num_olymp_game) %>%
unique()
sports <- olymp %>%
mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
group_by(Year, Country, Sport) %>%
group_by(Country, Sport, Medal) %>%
mutate(medal_sport_cnt = as.integer(length(Medal))) %>%
select(Country, Sport, Medal, medal_sport_cnt) %>%
unique() %>%
merge(sports_per_game, on = "Country") %>%
mutate(adj_medals = ifelse(Sport == "Ice Hockey", num_olymp_game, medal_sport_cnt))
# Canada vs. USA Medals
sports %>%
filter(Country == "USA" | Country == "CAN") %>%
group_by(Country, Sport) %>%
summarize(total_medals = sum(adj_medals)) %>%
ggplot(aes(x = Sport, y = total_medals)) +
geom_bar(aes(color = Country, fill = Country), stat = "identity", alpha = 0.8) +
coord_flip() +
labs(x= "",
y= " ",
title = "U.S. vs. Canadian Olympic Medals",
subtitle = "Winter Olympics 1924 - 2014") +
theme_light()I’d recommend using the following visualization to compare Canada and USA’s Olympic performance by sport (discipline, to be exact). USA’s advantage over Canada is clear. To improve this plot, I’d add an interactive overlay to indicate the precise amount of medals per sport layer.
sports %>%
filter(Country == "USA" | Country == "CAN") %>%
group_by(Country, Sport) %>%
summarize(total_medals = sum(adj_medals)) %>%
ggplot(aes(x = Country, y = total_medals)) +
geom_bar(aes(fill = Sport), stat = "identity", alpha = 0.9) +
scale_fill_brewer(palette = "Reds") +
labs(x= "",
y= " ",
title = "U.S. vs. Canadian Olympic Medals",
subtitle = "Winter Olympics 1924 - 2014") +
theme_light()To best visualize the top 10 medal-winning countries, I’d suggest using the visualization below. It provides the reader with even more information but in a (hopefully) digestible way – the reader can tell how many of each medal type each country won overall in each discipline. The top ten countries happen to be best at Skiing and Skating sports as indicated by the darker blues.
# Top 10 Medal Winning Countries Split by Sport
#png("../fig/sports.png")
palette <- colorRampPalette(brewer.pal(9, 'GnBu'), space='Lab', bias = 5)
sports <- sports %>%
filter(Country %in% top10$Country) %>%
ggplot(aes(x = Sport, y = Medal, fill = adj_medals)) +
geom_tile(color = "white") +
facet_grid(~Country) +
scale_fill_gradientn(colors = palette(9)) +
theme_light() +
coord_flip() +
theme(strip.background = element_rect(fill = "lightskyblue2")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(panel.grid=element_blank()) +
labs(x= "",
y= "Medal Types",
title = "Top 10 Medal Producing Countries by Sport",
subtitle="Winter Olympics 1924 - 2014",
fill="# of Medals")
sportsBelow is a visual display of the most successful winter Olympic athletes of all time. Norway tends to produce the top winning winter athletes, which is not surprising given Norway’s clear success in prior plots.
athletes <- olymp %>%
mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
group_by(Athlete, Gender, Medal) %>%
select(Country, Athlete, Discipline, Sport) %>%
mutate(total_medals_per = length(Medal)) %>%
ungroup() %>%
unique()
athletes <- athletes %>%
group_by(Athlete, Gender, Country) %>%
mutate(totalmedals = sum(total_medals_per)) %>%
select(Athlete, Gender, Country, Discipline, Sport, totalmedals) %>%
unique()
athletes$Discipline <- as.factor(athletes$Discipline)
athletes$Athlete <- as.factor(athletes$Athlete)
athletes$totalmedals <- as.integer(athletes$totalmedals)
# top 20 overall
top_20 <- athletes %>%
select(Athlete, Gender, totalmedals, Sport, Discipline) %>%
unique() %>%
arrange(desc(totalmedals))
top_20 <- top_20[-10, ] # ridding of second country, looked up most represented country
top_20 <- top_20[1:20, ]
top_20$Discipline <- as.character(top_20$Discipline)
top_20[3, 6] <- "Multiple Sports"
top_20[4, 6] <- "Multiple Sports"
top_20 <- unique(top_20)
# top 10 female
top_10_F <- athletes %>%
filter(Gender == "Women") %>%
arrange(desc(totalmedals))
top_10_F <- top_10_F[-5, ]
top_10_F <- top_10_F[1:10, ]
# top 10 men
top_10_M <- athletes %>%
filter(Gender == "Men") %>%
arrange(desc(totalmedals))
top_10_M$Discipline <- as.character(top_10_M$Discipline)
top_10_M[3, 4] <- "Multiple Sports"
top_10_M[4, 4] <- "Multiple Sports"
top_10_M <- unique(top_10_M)
top_10_M <- top_10_M[1:10, ]
#png("../fig/athletes.png")
athletes <- top_20 %>%
ggplot(aes(reorder(x = Athlete, totalmedals), y = totalmedals)) +
geom_bar(aes(fill = Country == "NOR"), alpha = 0.6, stat = "identity") +
scale_fill_manual(values=c("grey","red"), labels = c("Other Country", "Norway")) +
coord_flip() +
theme_light() +
labs(x= "",
y= "Overall Medals",
title = "Top 20 Medal-Winning Olympians: Norway Dominates
",
subtitle = "Winter Olympics 1924 - 2014") +
theme(legend.title = element_blank())
athletesI then split the athletes by gender. From the plots, we can see that women dominate in Cross Country Skiing while men dominate in the Biathlon.
#png("../fig/top10F.png")
top10F <- top_10_F %>%
ggplot(aes(reorder(x = Athlete, totalmedals), y = totalmedals)) +
geom_bar(aes(fill = Discipline), alpha = 0.7, stat = "identity") +
coord_flip() +
theme_light() +
theme(legend.position = "bottom") +
labs(x= "",
y= "Total Medals",
title = "Top 10 Female Medal-Winning Olympians",
subtitle = "Winter Olympics 1924 - 2014") +
scale_y_continuous(breaks = pretty_breaks()) +
scale_fill_brewer(palette = "Dark2", name = "Sport")
top10F#png("../fig/top10M.png")
top10M <- top_10_M %>%
ggplot(aes(reorder(x = Athlete, totalmedals), y = totalmedals)) +
geom_bar(aes(fill = Discipline), alpha = 0.7, stat = "identity") +
coord_flip() +
theme_light() +
theme(legend.position = "bottom") +
labs(x= "",
y= "Total Medals",
title = "Top 10 Male Medal-Winning Olympians",
subtitle = "Winter Olympics 1924 - 2014") +
scale_y_continuous(breaks = pretty_breaks()) +
scale_fill_brewer(palette = "Dark2", name = "Sport")
top10M #dev.off()
#source("../lib/multiplot.R")
# layout <- matrix(c(1, 1, 2, 2, 1, 1, 2, 2), 2, 4, byrow = TRUE)
# multiplot(top10F, top10M, layout = layout) For my first interactive plot, I chose to add additional information to my bubble chart that represented the number of medals per population of each country. Prior to adding interactivity, the scale for total medals won was unclear. The below plot allows the user to hover over each bubble and get a clearer picture of how many overall medals the country won on a scale .
library(plotly)
# Had to rearrange the ggplot to get rid of arrow element
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "total_medals"] <- "Total_Medals"
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "Country"] <- "Code"
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "Country_Full"] <- "Country"
norway2 <- by_pop_gdp %>%
mutate(medspop = Total_Medals/Population * 100) %>%
ggplot(aes(x = log(Population), y = Total_Medals/Population *100)) +
geom_count(aes(color = Code, size = Total_Medals,
fill = Code, alpha = 0.7, label = Country), show.legend = FALSE) +
labs(x = "Population",
y = "% of Medals per Pop",
title = "Norway & Finland Citizens Dominate",
subtitle = "Winter Olympics 1924 - 2014: Percentage of Medals Won per Population",
caption = "Bigger Bubbles = More Medals Won Overall") +
scale_size_area(max_size = 20) +
geom_text(aes(label = Code), check_overlap = TRUE, size = 3) +
theme_classic() +
theme(axis.text = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.title.y = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 12)) +
theme(legend.position = 'none')
plotly1 <- ggplotly(p = norway2, tooltip = c("Country", "Total_Medals"))
plotly1#api_create(plotly1, filename = "norway_pop-plotly")My second interactive plot (created with HighCharter) I modeled off of my first heat map host country visualization. The user can hover over each heatmap tile to see whether the country hosted the Olympics that particular year and how many medals the country won.
#library("viridis")
library(highcharter)
x <- c("Total Medals", "Host", "Year")
y <- sprintf("{point.%s}", c("total_medals", "hosted", "Year"))
tltip <- tooltip_table(x, y)
hosts2 <- hosts %>%
filter(Country_Full %in% Host_Country)
hosts2$hosted <- ifelse(hosts2$hosted == 1, "Yes", "No")
hosthc <- hchart(hosts2, "heatmap", hcaes(x = Year, y = Country_Full, value = total_medals)) %>%
hc_colorAxis(stops = color_stops(10, (palette(10))),
type = "logarithmic") %>%
hc_tooltip(useHTML = TRUE, pointFormat = tltip) %>%
hc_title(text = "Home Court Advantage") %>%
hc_legend(layout = "vertical", verticalAlign = "top",
align = "right", valueDecimals = 0) %>%
hc_size(height = 800)
hosthcI chose to create a data table that provides the reader with information about each contending country in the Winter Olympics, including the country’s population, how many winter games the country has competed and medaled in, and the number of Gold, Silver and Bronze medals won. The small colored bars are supposed to serve as a visual representation of the proportion of medals that were Gold, Silver and Bronze.
library(DT)
descriptive_pop <- descriptive_gdp %>%
select(Country_Full, Population, num_olymp, Gold, Silver, Bronze, total_medals)
descriptive_pop$total_medals <- as.integer(descriptive_pop$total_medals)
## Borrowed the sketch Code from: https://rstudio.github.io/DT/
sketch = htmltools::withTags(table(
class = 'display',
thead(
tr(
th(rowspan = 2, 'Country'),
th(colspan = 2, 'Olympics'),
th(colspan = 4, 'Medals')
),
tr(
lapply(c('Population', 'Number of Games', 'Gold', 'Silver', 'Bronze', 'Total'), th)
)
)
))
## Data Table
options(DT.options = list(pageLength = 10, autoWidth = TRUE, columnDefs = list(list(width = '80px', targets = c(1, 2, 3, 4, 5, 6)))))
desc_dt <- descriptive_pop %>%
datatable(rownames = FALSE, filter = list(position = "top"),
colnames = c("Country" = "Country_Full", "# Winter Olympics Medaled" = "num_olymp",
"# Gold Medals" = "Gold", "# Silver Medals" = "Silver",
"# Bronze Medals" = "Bronze",
"# Total Medals" = "total_medals"),
width = 700, height = 600,
options = list(language = list(sSearch = "Filter:")),
container = sketch,
caption = htmltools::tags$caption(
# style borrowed from same R Studio Github above
style = 'caption-side: bottom; text-align: center;',
'Table 1: ', htmltools::em('Winter Olympics 1924 - 2014.' ))) %>%
formatStyle('Country', fontWeight = 'bold') %>%
formatStyle(columns = c(2,3,4,5,6,7), fontSize = '80%') %>%
formatStyle('# Total Medals', fontWeight = 'bold', backgroundColor = "lightblue") %>%
formatStyle('# Gold Medals',
background = styleColorBar(range(descriptive_pop$total_medals), 'gold'),
backgroundSize = '90% 80%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle('# Silver Medals',
background = styleColorBar(range(descriptive_pop$total_medals), 'silver'),
backgroundSize = '90% 80%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle('# Bronze Medals',
background = styleColorBar(range(descriptive_pop$total_medals), 'brown'),
backgroundSize = '90% 80%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center')
desc_dt